1 세대 데이터를 이용import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
df = pd.read_csv("./Pokemon.csv")
df
| # | Name | Type 1 | Type 2 | Total | HP | Attack | Defense | Sp. Atk | Sp. Def | Speed | Generation | Legendary | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | Bulbasaur | Grass | Poison | 318 | 45 | 49 | 49 | 65 | 65 | 45 | 1 | False |
| 1 | 2 | Ivysaur | Grass | Poison | 405 | 60 | 62 | 63 | 80 | 80 | 60 | 1 | False |
| 2 | 3 | Venusaur | Grass | Poison | 525 | 80 | 82 | 83 | 100 | 100 | 80 | 1 | False |
| 3 | 3 | VenusaurMega Venusaur | Grass | Poison | 625 | 80 | 100 | 123 | 122 | 120 | 80 | 1 | False |
| 4 | 4 | Charmander | Fire | NaN | 309 | 39 | 52 | 43 | 60 | 50 | 65 | 1 | False |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 795 | 719 | Diancie | Rock | Fairy | 600 | 50 | 100 | 150 | 100 | 150 | 50 | 6 | True |
| 796 | 719 | DiancieMega Diancie | Rock | Fairy | 700 | 50 | 160 | 110 | 160 | 110 | 110 | 6 | True |
| 797 | 720 | HoopaHoopa Confined | Psychic | Ghost | 600 | 80 | 110 | 60 | 150 | 130 | 70 | 6 | True |
| 798 | 720 | HoopaHoopa Unbound | Psychic | Dark | 680 | 80 | 160 | 60 | 170 | 130 | 80 | 6 | True |
| 799 | 721 | Volcanion | Fire | Water | 600 | 80 | 110 | 120 | 130 | 90 | 70 | 6 | True |
800 rows × 13 columns
df_g1 = df[df['Generation'] == 1]
df_g1
| # | Name | Type 1 | Type 2 | Total | HP | Attack | Defense | Sp. Atk | Sp. Def | Speed | Generation | Legendary | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | Bulbasaur | Grass | Poison | 318 | 45 | 49 | 49 | 65 | 65 | 45 | 1 | False |
| 1 | 2 | Ivysaur | Grass | Poison | 405 | 60 | 62 | 63 | 80 | 80 | 60 | 1 | False |
| 2 | 3 | Venusaur | Grass | Poison | 525 | 80 | 82 | 83 | 100 | 100 | 80 | 1 | False |
| 3 | 3 | VenusaurMega Venusaur | Grass | Poison | 625 | 80 | 100 | 123 | 122 | 120 | 80 | 1 | False |
| 4 | 4 | Charmander | Fire | NaN | 309 | 39 | 52 | 43 | 60 | 50 | 65 | 1 | False |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 161 | 149 | Dragonite | Dragon | Flying | 600 | 91 | 134 | 95 | 100 | 100 | 80 | 1 | False |
| 162 | 150 | Mewtwo | Psychic | NaN | 680 | 106 | 110 | 90 | 154 | 90 | 130 | 1 | True |
| 163 | 150 | MewtwoMega Mewtwo X | Psychic | Fighting | 780 | 106 | 190 | 100 | 154 | 100 | 130 | 1 | True |
| 164 | 150 | MewtwoMega Mewtwo Y | Psychic | NaN | 780 | 106 | 150 | 70 | 194 | 120 | 140 | 1 | True |
| 165 | 151 | Mew | Psychic | NaN | 600 | 100 | 100 | 100 | 100 | 100 | 100 | 1 | False |
166 rows × 13 columns
df_g1.isnull().sum()
# 0 Name 0 Type 1 0 Type 2 88 Total 0 HP 0 Attack 0 Defense 0 Sp. Atk 0 Sp. Def 0 Speed 0 Generation 0 Legendary 0 dtype: int64
df_g1['Name'].str.contains('Mega').sum()
15
g1_df = df_g1[df_g1['Name'].str.contains('Mega') == False]
g1_df
| # | Name | Type 1 | Type 2 | Total | HP | Attack | Defense | Sp. Atk | Sp. Def | Speed | Generation | Legendary | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | Bulbasaur | Grass | Poison | 318 | 45 | 49 | 49 | 65 | 65 | 45 | 1 | False |
| 1 | 2 | Ivysaur | Grass | Poison | 405 | 60 | 62 | 63 | 80 | 80 | 60 | 1 | False |
| 2 | 3 | Venusaur | Grass | Poison | 525 | 80 | 82 | 83 | 100 | 100 | 80 | 1 | False |
| 4 | 4 | Charmander | Fire | NaN | 309 | 39 | 52 | 43 | 60 | 50 | 65 | 1 | False |
| 5 | 5 | Charmeleon | Fire | NaN | 405 | 58 | 64 | 58 | 80 | 65 | 80 | 1 | False |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 159 | 147 | Dratini | Dragon | NaN | 300 | 41 | 64 | 45 | 50 | 50 | 50 | 1 | False |
| 160 | 148 | Dragonair | Dragon | NaN | 420 | 61 | 84 | 65 | 70 | 70 | 70 | 1 | False |
| 161 | 149 | Dragonite | Dragon | Flying | 600 | 91 | 134 | 95 | 100 | 100 | 80 | 1 | False |
| 162 | 150 | Mewtwo | Psychic | NaN | 680 | 106 | 110 | 90 | 154 | 90 | 130 | 1 | True |
| 165 | 151 | Mew | Psychic | NaN | 600 | 100 | 100 | 100 | 100 | 100 | 100 | 1 | False |
151 rows × 13 columns
g1_df.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 151 entries, 0 to 165 Data columns (total 13 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 # 151 non-null int64 1 Name 151 non-null object 2 Type 1 151 non-null object 3 Type 2 67 non-null object 4 Total 151 non-null int64 5 HP 151 non-null int64 6 Attack 151 non-null int64 7 Defense 151 non-null int64 8 Sp. Atk 151 non-null int64 9 Sp. Def 151 non-null int64 10 Speed 151 non-null int64 11 Generation 151 non-null int64 12 Legendary 151 non-null bool dtypes: bool(1), int64(9), object(3) memory usage: 13.7+ KB
def types(data, n_type, name, title):
pokemon_dist = data.groupby(n_type)[name].count()
pie, ax = plt.subplots(figsize = [10, 10])
labels = pokemon_dist.keys()
plt.pie(x = pokemon_dist, autopct = '%.1f%%', explode = [0.05]*labels.nunique(), labels = labels, pctdistance = 0.5)
plt.title(title)
plt.show()
types(data = g1_df, n_type = 'Type 1', name = 'Name', title = 'Pokemon Type 1 Distribution')
types(data = g1_df, n_type = 'Type 2', name = 'Name', title = 'Pokemon Type 1 Distribution')
status = ['Total','Attack','Defense','Sp. Atk','Sp. Def','Speed']
print('Generation ',1,' Pokemon Max Status')
for i in status:
maxstats = g1_df.loc[g1_df[i]==g1_df[i].max(),'Name']
print('Max ',i,' : ',maxstats.values,g1_df[i].max())
Generation 1 Pokemon Max Status Max Total : ['Mewtwo'] 680 Max Attack : ['Dragonite'] 134 Max Defense : ['Cloyster'] 180 Max Sp. Atk : ['Mewtwo'] 154 Max Sp. Def : ['Articuno'] 125 Max Speed : ['Electrode'] 140
pd.set_option('mode.chained_assignment', None) # <==== 경고를 끈다
g1_df['Type 1 + Type 2'] = g1_df['Type 1'] +' '+ g1_df['Type 2']
g1_df['Average'] = (g1_df.copy()['Total']/6).round(1)
def status(color):
f, ax = plt.subplots(figsize=(20,20))
n = 1
c = 'tab:'+ color
print('Generation ',1,' Pokemon Status Distribution')
for i in ['Total','HP','Attack','Defense','Sp. Atk','Sp. Def','Speed','Average']:
plt.subplot(4,2,n)
x = g1_df[i]
ax = sns.histplot(x, bins=10, color = c, kde=True, stat="density", linewidth=0)
n = n + 1
status('blue')
Generation 1 Pokemon Status Distribution
stats_df = g1_df.drop(['Total', 'Legendary', 'Generation', '#'], axis=1)
plt.figure(figsize=(15,10))
sns.boxplot(data=stats_df)
plt.title('Generation 1 Pokemon Status Boxplot', size = 14)
Text(0.5, 1.0, 'Generation 1 Pokemon Status Boxplot')
from math import pi
from matplotlib.path import Path
from matplotlib.spines import Spine
from matplotlib.transforms import Affine2D
type1_2 = pd.DataFrame({'count' : g1_df.fillna('None').groupby( [ "Type 1",'Type 2'] ).size()}).reset_index()
type1_2.head()
| Type 1 | Type 2 | count | |
|---|---|---|---|
| 0 | Bug | Flying | 2 |
| 1 | Bug | Grass | 2 |
| 2 | Bug | None | 3 |
| 3 | Bug | Poison | 5 |
| 4 | Dragon | Flying | 1 |
import plotly.express as px
fig = px.treemap(type1_2, names = 'Type 2', values = 'count', path = ['Type 1', 'Type 2'],
color_discrete_sequence = px.colors.qualitative.Set1, width = 900, height = 600)
fig.show()
plt.figure(figsize=(15,10))
sns.countplot(x='Type 1', data=g1_df,order = g1_df['Type 1'].value_counts().index)
plt.xticks(rotation = 45)
plt.show()
plt.figure(figsize=(15,10))
sns.countplot(x='Type 2', data=g1_df.fillna('None'), order = g1_df.fillna('None')['Type 2'].value_counts().index)
plt.xticks(rotation = 45)
plt.show()
average_df=g1_df.mean()
average_df
# 76.000000 Total 407.079470 HP 64.211921 Attack 72.549669 Defense 68.225166 Sp. Atk 67.139073 Sp. Def 66.019868 Speed 68.933775 Generation 1.000000 Legendary 0.026490 Average 67.846358 dtype: float64
max_df=g1_df.max()
max_df
# 151 Name Zubat Type 1 Water Total 680 HP 250 Attack 134 Defense 180 Sp. Atk 154 Sp. Def 125 Speed 140 Generation 1 Legendary True Average 113.3 dtype: object
g1_df['Total'].groupby(by = g1_df['Name']).max().sort_values(ascending = False)
Name
Mewtwo 680
Dragonite 600
Mew 600
Articuno 580
Moltres 580
...
Metapod 205
Kakuna 205
Magikarp 200
Caterpie 195
Weedle 195
Name: Total, Length: 151, dtype: int64
fig, ax = plt.subplots(1, 1, figsize=(50,5))
sorted_df = g1_df.groupby(['Name'])['Total'].mean().to_frame().sort_values(by = 'Total', ascending = False)
sns.barplot(x = sorted_df.index, y= 'Total', data = sorted_df)
plt.xlabel('Name')
plt.xticks(fontsize=15, rotation=90) # when X tick labels are long
plt.title("Total Status of Pokemon", fontsize = 30)
plt.show()
status = ['Total','Attack','Defense','Sp. Atk','Sp. Def','Speed']Mewtwo : 뮤츠¶total = g1_df['Total'].groupby(by = g1_df['Name']).max().sort_values(ascending = False)
total.index[0]
'Mewtwo'
Chansey : 럭키¶hp = g1_df['HP'].groupby(by = g1_df['Name']).max().sort_values(ascending = False)
hp.index[0]
'Chansey'
Dragonite : 망나뇽¶attack = g1_df['Attack'].groupby(by = g1_df['Name']).max().sort_values(ascending = False)
attack.index[0]
'Dragonite'
Cloyster : 파르셀¶defense = g1_df['Defense'].groupby(by = g1_df['Name']).max().sort_values(ascending = False)
defense.index[0]
'Cloyster'
spatk = g1_df['Sp. Atk'].groupby(by = g1_df['Name']).max().sort_values(ascending = False)
spatk.index[0]
'Mewtwo'
spdef = g1_df['Sp. Def'].groupby(by = g1_df['Name']).max().sort_values(ascending = False)
spdef.index[0]
'Articuno'
Electrode : 붐볼¶speed= g1_df['Speed'].groupby(by = g1_df['Name']).max().sort_values(ascending = False)
speed.index[0]
'Electrode'
max_p = pd.concat([g1_df[(g1_df['Name'] == total.index[0])], g1_df[(g1_df['Name'] == hp.index[0])], g1_df[(g1_df['Name'] == attack.index[0])],
g1_df[(g1_df['Name'] == defense.index[0])], g1_df[(g1_df['Name'] == spatk.index[0])], g1_df[(g1_df['Name'] == spdef.index[0])],
g1_df[(g1_df['Name'] == speed.index[0])]])
max_p
| # | Name | Type 1 | Type 2 | Total | HP | Attack | Defense | Sp. Atk | Sp. Def | Speed | Generation | Legendary | Type 1 + Type 2 | Average | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 162 | 150 | Mewtwo | Psychic | NaN | 680 | 106 | 110 | 90 | 154 | 90 | 130 | 1 | True | NaN | 113.3 |
| 121 | 113 | Chansey | Normal | NaN | 450 | 250 | 5 | 5 | 35 | 105 | 50 | 1 | False | NaN | 75.0 |
| 161 | 149 | Dragonite | Dragon | Flying | 600 | 91 | 134 | 95 | 100 | 100 | 80 | 1 | False | Dragon Flying | 100.0 |
| 98 | 91 | Cloyster | Water | Ice | 525 | 50 | 95 | 180 | 85 | 45 | 70 | 1 | False | Water Ice | 87.5 |
| 162 | 150 | Mewtwo | Psychic | NaN | 680 | 106 | 110 | 90 | 154 | 90 | 130 | 1 | True | NaN | 113.3 |
| 156 | 144 | Articuno | Ice | Flying | 580 | 90 | 85 | 100 | 95 | 125 | 85 | 1 | True | Ice Flying | 96.7 |
| 109 | 101 | Electrode | Electric | NaN | 480 | 60 | 50 | 70 | 80 | 80 | 140 | 1 | False | NaN | 80.0 |
### 데이터 준비
df = pd.DataFrame({
'Character': ['Average', 'Max_Each', 'Mewtwo', 'Chansey', 'Dragonite', 'Cloyster', 'Mewtwo', 'Articuno', 'Electrode'],
'HP': [64, 250, 106, 250, 91, 50, 106, 90, 60],
'Attack': [72, 134, 110, 5, 134, 95, 110, 85, 50],
'Defense': [68, 180, 90, 5, 95, 180, 90, 100, 70],
'Sp. Atk': [67, 154, 154, 35, 100, 85, 154, 95, 80],
'Sp. Def': [66, 125, 90, 105, 100, 45, 90, 125, 80],
'Speed': [69, 140, 130, 50, 80, 70, 130, 85, 140]
})
## 따로 그리기
labels = df.columns[1:]
num_labels = len(labels)
angles = [x/float(num_labels)*(2*pi) for x in range(num_labels)] ## 각 등분점
angles += angles[:1] ## 시작점으로 다시 돌아와야하므로 시작점 추가
my_palette = plt.cm.get_cmap("Set2", len(df.index))
fig = plt.figure(figsize=(15,20))
fig.set_facecolor('white')
for i, row in df.iterrows():
color = my_palette(i)
data = df.iloc[i].drop('Character').tolist()
data += data[:1]
ax = plt.subplot(4,3,i+1, polar=True)
ax.set_theta_offset(pi / 2) ## 시작점
ax.set_theta_direction(-1) ## 그려지는 방향 시계방향
plt.xticks(angles[:-1], labels, fontsize=13) ## x축 눈금 라벨
ax.tick_params(axis='x', which='major', pad=15) ## x축과 눈금 사이에 여백을 준다.
ax.set_rlabel_position(0) ## y축 각도 설정(degree 단위)
plt.yticks([0,50,100,150,200,250],['0','50','100','150','200','250'], fontsize=10) ## y축 눈금 설정
plt.ylim(0,250)
ax.plot(angles, data, color=color, linewidth=2, linestyle='solid') ## 레이더 차트 출력
ax.fill(angles, data, color=color, alpha=0.4) ## 도형 안쪽에 색을 채워준다.
plt.title(row.Character, size=20, color=color,x=-0.2, y=1.2, ha='left') ## 타이틀은 캐릭터 클래스로 한다.
plt.tight_layout(pad=5) ## subplot간 패딩 조절
plt.show()
## 하나로 합치기 - 폴리곤
labels = df.columns[1:]
num_labels = len(labels)
angles = [x/float(num_labels)*(2*pi) for x in range(num_labels)] ## 각 등분점
angles += angles[:1] ## 시작점으로 다시 돌아와야하므로 시작점 추가
my_palette = plt.cm.get_cmap("Set2", len(df.index))
fig = plt.figure(figsize=(8,8))
fig.set_facecolor('white')
ax = fig.add_subplot(polar=True)
for i, row in df.iterrows():
color = my_palette(i)
data = df.iloc[i].drop('Character').tolist()
data += data[:1]
ax.set_theta_offset(pi / 2) ## 시작점
ax.set_theta_direction(-1) ## 그려지는 방향 시계방향
plt.xticks(angles[:-1], labels, fontsize=13) ## x축 눈금 라벨
ax.tick_params(axis='x', which='major', pad=15) ## x축과 눈금 사이에 여백을 준다.
ax.set_rlabel_position(0) ## y축 각도 설정(degree 단위)
plt.yticks([0,50,100,150,200,250],['0','50','100','150','200','250'], fontsize=10) ## y축 눈금 설정
plt.ylim(0,250)
ax.plot(angles, data, color=color, linewidth=2, linestyle='solid', label=row.Character) ## 레이더 차트 출력
ax.fill(angles, data, color=color, alpha=0.4) ## 도형 안쪽에 색을 채워준다.
for g in ax.yaxis.get_gridlines(): ## grid line
g.get_path()._interpolation_steps = len(labels)
spine = Spine(axes=ax,
spine_type='circle',
path=Path.unit_regular_polygon(len(labels)))
## Axes의 중심과 반지름을 맞춰준다.
spine.set_transform(Affine2D().scale(.5).translate(.5, .5)+ax.transAxes)
ax.spines = {'polar':spine} ## frame의 모양을 원에서 폴리곤으로 바꿔줘야한다.
plt.legend(loc=(0.9,0.9))
plt.show()